
k <- readRDS("temp/vf_mn.rds") |> 
  filter(treated > 0)

lat <- readRDS("temp/vf_later_mn.rds") |> 
  mutate(treated = 0)

saveRDS(select(k, -LALVOTERID, -surname,
               -street, -num, -rest, -Residence_Addresses_City,
               -cong, -Voters_BirthDate), "temp/vf_mn_anon.rds")

saveRDS(select(lat, -LALVOTERID, -surname,
               -street, -Residence_Addresses_City,
               -cong, -Voters_BirthDate), "temp/vf_later_mn_anon.rds")

##########################################
##########################################
##########################################
##########################################

k <- bind_rows(readRDS("temp/vf_mn_anon.rds"),
               readRDS("temp/vf_later_mn_anon.rds"))

k <- k |> 
  ungroup() |> 
  mutate(voter_id = sample(1:n(), n()))

k <- k |> 
  filter(n_dead < 3 | is.na(n_dead),
         death_date <= "2020-12-31" | treated == 0) |> 
  rename(deced_age = age) |> 
  mutate(reg_date = as.numeric(as.Date(reg_date, "%m/%d/%Y") - as.Date("2010-01-01")),
         death_date = as.numeric(death_date - as.Date("2010-01-01")))



first_set <- filter(k,
                    treated > 0) |> 
  mutate(t = treated == 2)

first_set <- first_set[complete.cases(select(first_set,
                                             lat, lon, voter_age,
                                             reg_date,
                                             deced_age,
                                             death_date,
                                             n_dead,
                                             General_2012_11_06,
                                             General_2014_11_04,
                                             General_2016_11_08,
                                             General_2018_11_06,
                                             starts_with("pred."),
                                             male, 
                                             median_income, some_college, pop_dens)), ]

X <- select(first_set,
            lat, lon, voter_age,
            reg_date,
            deced_age,
            death_date,
            n_dead,
            General_2012_11_06,
            General_2014_11_04,
            General_2016_11_08,
            General_2018_11_06,
            starts_with("pred."),
            male, 
            median_income, some_college, pop_dens,
            -pred.other, -pred.aian)

mb <- ebalance(first_set$t, X)

first_set <- bind_rows(
  first_set |> 
    filter(t) |> 
    mutate(weight = 1),
  first_set |> 
    filter(!t) |> 
    mutate(weight = mb$w)
)

ll <- first_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), ~ weighted.mean(., weight))) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ll2 <- first_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), mean)) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ggplot(ll, aes(x = name, y = value, color = t)) + geom_line()

###################################################

second_set <- filter(k,
                     treated != 1) |> 
  mutate(t = treated == 2)

second_set <- second_set[complete.cases(select(second_set,
                                               lat, lon, voter_age,
                                               reg_date,
                                               deced_age,
                                               n_dead,
                                               General_2012_11_06,
                                               General_2014_11_04,
                                               General_2016_11_08,
                                               General_2018_11_06,
                                               starts_with("pred."),
                                               male, 
                                               median_income, some_college, pop_dens)), ]

X <- select(second_set,
            lat, lon, voter_age,
            reg_date,
            deced_age,
            n_dead,
            General_2012_11_06,
            General_2014_11_04,
            General_2016_11_08,
            General_2018_11_06,
            starts_with("pred."),
            male, 
            median_income, some_college, pop_dens,
            -pred.other, -pred.aian)

mb <- ebalance(second_set$t, X)

second_set <- bind_rows(
  second_set |> 
    filter(t) |> 
    mutate(weight = 1),
  second_set |> 
    filter(!t) |> 
    mutate(weight = mb$w)
)

ll <- second_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), ~ weighted.mean(., weight))) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ggplot(ll, aes(x = name, y = value, color = t)) + geom_line()

##########################

full <- bind_rows(
  first_set,
  filter(second_set, !t)
) |> 
  pivot_longer(starts_with("Gener")) |> 
  mutate(name = as.integer(substring(name, 9, 12)),
         t1 = treated > 0,
         t2 = treated > 1) |> 
  rename(year = name,
         turnout = value)

ll <- full |> 
  group_by(treated, year) |> 
  summarize(`Weighted` = weighted.mean(turnout, weight),
            `Unweighted` = mean(turnout)) |> 
  pivot_longer(cols = c(Weighted, Unweighted), names_to = "pan", values_to = "turnout")


ll <- ll |> 
  mutate(group = case_when(treated == 0 ~ "No Household Deaths",
                           treated == 1 ~ "Household Non-Covid Death",
                           treated == 2 ~ "Household Covid Death"))

saveRDS(ll, "temp/weighted_full_ll_mn.rds")

ggplot(ll, aes(x = year, y = turnout, linetype = group, shape = group)) + geom_line() +
  geom_point() +
  facet_grid(. ~ pan) +
  theme_bc(legend.position = "bottom") +
  guides(linetype = guide_legend(title.position = "top", title.hjust = 0.5),
         shape = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(x = "Year", y = "Turnout\n(share of voters registered in 2020)",
       linetype = "Treatment Group",
       shape = "Treatment Group",
       caption = "'No Household Deaths' and weighted to mirror 'Household Covid Death' using entropy balancing. Balancing covariates include decedent's age, number of household deaths, latitude, longitude, party affiliation, voter's age, registration date, BISG racial predictions, gender, party, block group median income, block group education, block group population density, and block group Covid death rates. 'Household Non-Covid Death' are weighted using the preceding covariates, along with decedent's date of death.") +
  scale_y_continuous(labels = scales::percent)

ggsave("temp/first_mn.png", width = 6, height = 4.5, units = "in")

#############################

m1 <- fixest::feols(turnout ~ t1 * t2 * I(year == 2020) | year + voter_id, 
                    full, cluster = c("voter_id", "year"),
                    weights = full$weight)

saveRDS(m1, "temp/mn_model_1.rds")

ests <- rbindlist(lapply(seq(2014, 2020, 2), function(y){
  j <- confint(fixest::feols(turnout ~ t1 * t2 * I(year == y) | year + voter_id, 
                             filter(full, year <= y), cluster = c("voter_id", "year"),
                             weights = filter(full, year <= y)$weight))
  j$year <- y
  j$est <- c("any death", "covid death")
  
  return(j)
}))

saveRDS(ests, "temp/es_mn.rds")

######################################
######################################
######################################
######################################

full <- as.data.table(filter(full, year == 2012))

unw <- full[, by = list(treated),
            c(lapply(.SD, mean)),
            .SDcols = c("reg_date",
                        "deced_age",
                        "death_date",
                        "n_dead",
                        "pred.white", "pred.black", "pred.hisp", "pred.asian",
                        "male", "voter_age",
                        "median_income", "some_college", "pop_dens")] |> 
  mutate(across(c(reg_date, death_date), ~ format((as.Date("2010-01-01") + .), "%B %d, %Y")),
         across(c(deced_age, n_dead, pop_dens, voter_age), ~ comma(., .1)),
         across(c(starts_with("pred"), male, some_college), ~ percent(., .1)),
         across(c(median_income), ~ dollar(., 1)))

unw <- pivot_longer(unw, cols = colnames(select(unw, -treated))) |> 
  mutate(treated = case_when(treated == 0 ~ "No Household Death",
                             treated == 1 ~ "Household Non-Covid Death",
                             T ~ "Household Covid Death")) |> 
  pivot_wider(names_from = treated, values_from = value)

w <- full[, by = list(treated),
          c(lapply(.SD, weighted.mean, weight)),
          .SDcols = c("reg_date",
                      "deced_age",
                      "death_date",
                      "n_dead",
                      "pred.white", "pred.black", "pred.hisp", "pred.asian",
                      "male", "voter_age",
                      "median_income", "some_college", "pop_dens")] |> 
  mutate(across(c(reg_date, death_date), ~ format((as.Date("2010-01-01") + .), "%B %d, %Y")),
         across(c(deced_age, n_dead, pop_dens, voter_age), ~ comma(., .1)),
         across(c(starts_with("pred"), male, some_college), ~ percent(., .1)),
         across(c(median_income), ~ dollar(., 1)))

w <- pivot_longer(w, cols = colnames(select(w, -treated))) |> 
  mutate(treated = case_when(treated == 0 ~ "No Household Death",
                             treated == 1 ~ "Household Non-Covid Death",
                             T ~ "Household Covid Death")) |> 
  pivot_wider(names_from = treated, values_from = value)

tab <- left_join(unw, w, by = "name") |> 
  mutate(across(c(starts_with("No H")), ~ ifelse(name == "death_date", "", .)))

vo <- fread("raw_data/var_orders.csv")

tab <- left_join(tab, vo) |> 
  arrange(order) |> 
  select(clean, everything(), -name, -order)

tab <- mutate(tab, across(everything(), ~ gsub("%", "\\\\%", .)))

colnames(tab) <- c(" ", rep(c("Household Covid Death", "Household Non-Covid Death", "No Household Death"), 2))

options(knitr.kable.NA = '')

for(i in c(2:nrow(tab))){
  if(i %% 2 == 0){
    tab$` `[i] <- paste0("\\rowcolor{Gray}", tab$` `[i])
  }
}

kable(tab, "latex", caption = "\\label{tab:full-bal} Balance Table for Entropy Balancing",
      linesep = "", align = c("l", rep("c", 6)),
      booktabs = T, escape = F) |> 
  add_header_above(c(" " = 1, "Means: Unweighted Data" = 3, "Means: Weighted Data" = 3), align = "c") |> 
  column_spec(c(1), width = "4cm") |>
  column_spec(c(2:7), width = "3cm") |>
  kable_styling(latex_options = c("scale_down")) |> 
  save_kable("temp/balance_mn.tex")

